tlc <- read_parquet(file = "./data/yellow_tripdata_2022-06.parquet")
tlc_zone <- st_read("./data/taxi_zones/taxi_zones.shp", quiet = TRUE)
ggplot(tlc_zone) + geom_sf() + theme_inset() + theme_bw()

tlc_zone <- st_transform(tlc_zone, crs = 4326)
ggplot(tlc_zone) + geom_sf(extent ="device")

our_neighbourhood <- tlc_zone %>%
  filter(zone == "Gramercy" | zone == "Kips Bay")

ggplot(tlc_zone) + geom_sf() +
  theme_inset() +
   geom_sf(data = our_neighbourhood, fill = "red")

bbox <- st_bbox(tlc_zone) %>% as.numeric

nyc_map <- get_stamenmap(bbox = bbox, messaging = FALSE, zoom = 11,
                         maptype = "toner-lite", format = c("png"))

ggmap(nyc_map) +
  geom_sf(data = our_neighbourhood, fill = "red", inherit.aes = FALSE)

pu_count <- tlc %>% group_by(PULocationID) %>% tally()
pu_count
## # A tibble: 261 x 2
##    PULocationID     n
##           <int> <int>
##  1            1  1027
##  2            2     3
##  3            3    40
##  4            4  4267
##  5            5    58
##  6            6     7
##  7            7  2241
##  8            8    24
##  9            9    40
## 10           10  1433
## # ... with 251 more rows
pu_count <- pu_count %>% rename(LocationID = PULocationID) 
joined_tbl <- left_join(tlc_zone, pu_count)

ggplot(joined_tbl) + geom_sf() + aes(fill = n)

ggplot(joined_tbl %>% filter(borough == "Manhattan")) + geom_sf() + aes(fill = n)

ggplot(joined_tbl %>% filter(borough == "Manhattan")) + geom_sf() + aes(fill = n) + scale_fill_viridis_c(option = "A")

bbox <- st_bbox(tlc_zone) %>% as.numeric

nyc_map <- get_stamenmap(bbox = bbox, messaging = FALSE, zoom = 11,
                         maptype = "toner-lite", format = c("png"))
ggmap(nyc_map)

ggmap(nyc_map) +
  geom_sf(data = our_neighbourhood, fill = "red", inherit.aes = FALSE)

ggmap(nyc_map) +
  geom_sf(data = joined_tbl, aes(fill = n), inherit.aes = FALSE) + scale_fill_viridis_c(option = "A")

tlc_zone_new <- left_join(tlc_zone, pu_count, by = "LocationID") %>% filter(borough == "Manhattan")
plot(our_neighbourhood)

random_locs <- st_sample(our_neighbourhood, type = "random", size = 10)
ggplot() + geom_sf(data = our_neighbourhood) + geom_sf(data = random_locs)

storage <- list()
map_output <- ggmap(nyc_map)
for (zone_id in 1:nrow(tlc_zone_new)){
  zone <- tlc_zone_new[zone_id, ]
  zone$n[is.na(zone$n)] <- 0
  sampled_points <- zone %>% st_sample(type = "random", size = round(zone$n/100))
  storage[[zone_id]] <- sampled_points
  map_output <- map_output + geom_sf(data = storage[[zone_id]], inherit.aes = FALSE, size = 0.5, alpha = 0.1)
  
}
map_output